In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
%matplotlib inline
from collections import Counter
import folium
import json
from itertools import islice
from html import HTML
from IPython.display import HTML

In [2]:
air = pd.read_csv('/Users/harishannavajjala/Downloads/vipul/airports.csv')
auto1 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/1987.csv',nrows = 400000)
auto2 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/1988.csv',nrows = 400000)
auto3 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/1989.csv',nrows = 400000)
auto4 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/1990.csv',nrows = 400000)
auto5 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/1991.csv',nrows = 400000)
auto6 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/1992.csv',nrows = 400000)
auto7 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/1993.csv',nrows = 400000)
auto8 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/1994.csv',nrows = 400000)
auto9 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/1995.csv',nrows = 400000)
auto10 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/1996.csv',nrows = 400000)
auto11 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/1997.csv',nrows = 400000)
auto12 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/1998.csv',nrows = 400000)
auto13 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/1999.csv',nrows = 400000)
auto14 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/2000.csv',nrows = 400000)
auto15 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/2001.csv',nrows = 400000)
auto16 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/2002.csv',nrows = 400000)
auto17 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/2003.csv',nrows = 400000)
auto18 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/2004.csv',nrows = 400000)
auto19 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/2005.csv',nrows = 400000)
auto20 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/2006.csv',nrows = 400000)
auto21 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/2007.csv',nrows = 400000)
auto22 =pd.read_csv('/Users/harishannavajjala/Downloads/vipul/2008.csv',nrows = 400000)

In [3]:
def inline_map(map):
    """
    Embeds the HTML source of the map directly into the IPython notebook.

    This method will not work if the map depends on any files (json data). Also this uses
    the HTML5 srcdoc attribute, which may not be supported in all browsers.
    """
    map._build_map()
    return HTML('<iframe srcdoc="{srcdoc}" style="width: 100%; height: 510px; border: none"></iframe>'.format(srcdoc=map.HTML.replace('"', '&quot;')))

def embed_map(map, path="map.html"):
    """
    Embeds a linked iframe to the map into the IPython notebook.

    Note: this method will not capture the source of the map into the notebook.
    This method should work for all maps (as long as they use relative urls).
    """
    map.create_map(path=path)
    return HTML('<iframe src="files/{path}" style="width: 100%; height: 510px; border: none"></iframe>'.format(path=path))

In [4]:
origin = Counter(auto1["Origin"]) + Counter(auto2["Origin"]) + Counter(auto3["Origin"]) + Counter(auto4["Origin"]) + Counter(auto5["Origin"]) + Counter(auto6["Origin"]) + Counter(auto7["Origin"]) + Counter(auto8["Origin"]) + Counter(auto9["Origin"]) + Counter(auto10["Origin"]) + Counter(auto11["Origin"]) + Counter(auto12["Origin"]) + Counter(auto13["Origin"]) + Counter(auto14["Origin"]) + Counter(auto15["Origin"]) + Counter(auto16["Origin"]) + Counter(auto17["Origin"]) + Counter(auto18["Origin"]) + Counter(auto19["Origin"]) + Counter(auto20["Origin"]) + Counter(auto21["Origin"]) + Counter(auto22["Origin"])
dest = Counter(auto1["Dest"]) + Counter(auto2["Dest"]) + Counter(auto3["Dest"]) + Counter(auto4["Dest"]) + Counter(auto5["Dest"]) + Counter(auto6["Dest"]) + Counter(auto7["Dest"]) + Counter(auto8["Dest"]) + Counter(auto9["Dest"]) + Counter(auto10["Dest"]) + Counter(auto11["Dest"]) + Counter(auto12["Dest"]) + Counter(auto13["Dest"]) + Counter(auto14["Dest"]) + Counter(auto15["Dest"]) + Counter(auto16["Dest"]) + Counter(auto17["Dest"]) + Counter(auto18["Dest"]) + Counter(auto19["Dest"]) + Counter(auto20["Dest"]) + Counter(auto21["Dest"]) + Counter(auto22["Dest"])

df_origin = pd.DataFrame.from_dict(origin, orient='index').reset_index()
df_origin = df_origin.rename(columns={'index':'iata', 0:'count'})


df_dest = pd.DataFrame.from_dict(dest, orient='index').reset_index()
df_dest = df_dest.rename(columns={'index':'iata', 0:'count'})

origin_results = pd.merge(left=df_origin,right=air, left_on='iata', right_on='iata')
dest_results = pd.merge(left=df_dest,right=air, left_on='iata', right_on='iata')

#print results
sorted_origin =origin_results.sort_index(by=['count'], ascending=[False])
sorted_dest =dest_results.sort_index(by=['count'], ascending=[False])

In [5]:
sorted_origin.head()


Out[5]:
iata count airport city state country lat long
154 ATL 445149 William B Hartsfield-Atlanta Intl Atlanta GA USA 33.640444 -84.426944
225 ORD 442835 Chicago O'Hare International Chicago IL USA 41.979595 -87.904464
208 DFW 367466 Dallas-Fort Worth International Dallas-Fort Worth TX USA 32.895951 -97.037200
64 LAX 308175 Los Angeles International Los Angeles CA USA 33.942536 -118.408074
63 PHX 281311 Phoenix Sky Harbor International Phoenix AZ USA 33.434167 -112.008056

In [6]:
sorted_origin.tail()


Out[6]:
iata count airport city state country lat long
237 MKK 30 Molokai Kaunakakai HI USA 21.152886 -157.096256
246 PSE 22 Mercedita Ponce PR USA 18.008303 -66.563012
184 CKB 6 Benedum Clarksburg WV USA 39.296639 -80.228083
317 OGD 1 Ogden-Hinckley Ogden UT USA 41.195944 -112.012175
250 CYS 1 Cheyenne Cheyenne WY USA 41.155722 -104.811838

In [7]:
sorted_dest.head()


Out[7]:
iata count airport city state country lat long
225 ORD 466028 Chicago O'Hare International Chicago IL USA 41.979595 -87.904464
154 ATL 463976 William B Hartsfield-Atlanta Intl Atlanta GA USA 33.640444 -84.426944
208 DFW 381855 Dallas-Fort Worth International Dallas-Fort Worth TX USA 32.895951 -97.037200
63 LAX 313721 Los Angeles International Los Angeles CA USA 33.942536 -118.408074
62 PHX 289144 Phoenix Sky Harbor International Phoenix AZ USA 33.434167 -112.008056

In [8]:
sorted_dest.tail()


Out[8]:
iata count airport city state country lat long
247 PSE 26 Mercedita Ponce PR USA 18.008303 -66.563012
185 CKB 6 Benedum Clarksburg WV USA 39.296639 -80.228083
318 OGD 4 Ogden-Hinckley Ogden UT USA 41.195944 -112.012175
251 CYS 1 Cheyenne Cheyenne WY USA 41.155722 -104.811838
234 PVU 1 Provo Muni Provo UT USA 40.219194 -111.723361

In [9]:
airports_origin = []
airports_origin.append(json.loads(sorted_origin.to_json(orient='index')))
airports_dest = []
airports_dest.append(json.loads(sorted_dest.to_json(orient='index')))
Busiest Airports by Origin

In [10]:
origin_map = folium.Map(location=[40, -99], zoom_start=4)
ARR_COLOR = '#2171b5'
for airport in islice(airports_origin,None):
    for i in airport:

        lat =  float(airport[i]['lat'])
        lon = float(airport[i]['long'])
        circleradius = int(airport[i]['count'])/3
        label = str(airport[i]['airport'])+", "+str(airport[i]['city'])+" Total flights: "+str(airport[i]['count'])
        origin_map.circle_marker(location=[lat, lon], radius=circleradius*2,popup=label,line_color=ARR_COLOR,fill_color=ARR_COLOR)

inline_map(origin_map)


Out[10]:
Busiest Airports by Destination

In [11]:
dest_map = folium.Map(location=[40, -99], zoom_start=4)
DEP_COLOR = '#238b45'
for airport in islice(airports_dest,None):
    for i in airport:

        lat =  float(airport[i]['lat'])
        lon = float(airport[i]['long'])
        circleradius = int(airport[i]['count'])/3
        label = str(airport[i]['airport'])+", "+str(airport[i]['city'])+" Total flights: "+str(airport[i]['count'])
        dest_map.circle_marker(location=[lat, lon], radius=circleradius*2,popup=label,line_color=DEP_COLOR,fill_color=DEP_COLOR)

inline_map(dest_map)


Out[11]:

In [ ]: